# Load necessary libraries
library(methods) # For S4 class and methods
library(Rcpp) # For integrating C++ functions into R
# Define a C++ function to compute the greatest common divisor (GCD)
cppFunction('
int C_gcd(int x, int y) {
return std::gcd(x, y); // Use the standard library gcd function
}
')
# Define a C++ function to compute the least common multiple (LCM)
cppFunction('
int C_lcm(int x, int y) {
return std::lcm(x, y); // Use the standard library lcm function
}
')STATS 506
Problem Set 5
The link to the problem set 5 GitHub repository is at: https://github.com/skyshi1/STAT506/tree/main/problemSet5, as a sub-folder of the STATS 506 repository.
Problem 1 - OOP Programming
a. Define the rational class
Use the Rcpp for the C functions:
Get the class:
# Define the Rational S4 Class
setClass(
"Rational",
slots = c(
numerator = "integer", # Slot to store the numerator of the rational number
denominator = "integer" # Slot to store the denominator of the rational number
),
validity = function(object) {
# Ensure the denominator is not zero, as division by zero is undefined
if (object@denominator == 0) {
stop("Denominator cannot be zero.")
}
# Ensure both numerator and denominator are integers
if (!is.integer(object@numerator) || !is.integer(object@denominator)) {
stop("Both numerator and denominator must be integers.")
}
TRUE # Return TRUE if the object passes validation checks
}
)
# Constructor Function for Rational Class
Rational <- function(numerator, denominator) {
# Validate that the denominator is not zero
if (denominator == 0) stop("Denominator cannot be zero.")
# Simplify the fraction using GCD (Greatest Common Divisor)
gcd <- C_gcd(as.integer(numerator), as.integer(denominator))
numerator <- numerator / gcd # Divide numerator by GCD
denominator <- denominator / gcd # Divide denominator by GCD
# Ensure both numerator and denominator remain integers after simplification
if (numerator %% 1 != 0 || denominator %% 1 != 0) {
stop("Both numerator and denominator must be integers.")
}
# Create and return a new Rational object
new("Rational", numerator = as.integer(numerator), denominator = as.integer(denominator))
}
# Show Method to Print the Rational Object
setMethod(
"show",
"Rational",
function(object) {
# Print the rational number in the form "numerator/denominator"
cat(object@numerator, "/", object@denominator, "\n")
}
)
# Method to Simplify a Rational Object
setGeneric("simplify", function(object) standardGeneric("simplify"))[1] "simplify"
setMethod(
"simplify",
"Rational",
function(object) {
# Simplify the rational number using GCD
gcd <- C_gcd(object@numerator, object@denominator)
# Create a new Rational object with simplified numerator and denominator
Rational(object@numerator / gcd, object@denominator / gcd)
}
)
# Method to Compute the Quotient of a Rational Object
setGeneric("quotient", function(object, digits = NULL) standardGeneric("quotient"))[1] "quotient"
setMethod(
"quotient",
"Rational",
function(object, digits = NULL) {
# Calculate the decimal value of the rational number
result <- object@numerator / object@denominator
# If digits are provided, round the result to the specified number of decimal places
if (!is.null(digits)) {
if (!is.numeric(digits) || length(digits) != 1) {
stop("digits must be a single numeric value.")
}
return(round(result, digits)) # Return the rounded value
}
result # Return the unrounded result
}
)
# Addition Method for Rational Objects
setMethod(
"+",
c("Rational", "Rational"),
function(e1, e2) {
# Find the Least Common Multiple (LCM) of the denominators
lcm_den <- C_lcm(e1@denominator, e2@denominator)
# Adjust numerators to have the same denominator, then add them
num <- e1@numerator * (lcm_den / e1@denominator) + e2@numerator * (lcm_den / e2@denominator)
# Create a new Rational object for the result
Rational(num, lcm_den)
}
)
# Subtraction Method for Rational Objects
setMethod(
"-",
c("Rational", "Rational"),
function(e1, e2) {
# Find the Least Common Multiple (LCM) of the denominators
lcm_den <- C_lcm(e1@denominator, e2@denominator)
# Adjust numerators to have the same denominator, then subtract them
num <- e1@numerator * (lcm_den / e1@denominator) - e2@numerator * (lcm_den / e2@denominator)
# Create a new Rational object for the result
Rational(num, lcm_den)
}
)
# Multiplication Method for Rational Objects
setMethod(
"*",
c("Rational", "Rational"),
function(e1, e2) {
# Multiply numerators and denominators directly
num <- e1@numerator * e2@numerator
den <- e1@denominator * e2@denominator
# Create a new Rational object for the result
Rational(num, den)
}
)
# Division Method for Rational Objects
setMethod(
"/",
c("Rational", "Rational"),
function(e1, e2) {
# Division is equivalent to multiplying by the reciprocal
if (e2@numerator == 0) stop("Division by zero is not allowed.")
num <- e1@numerator * e2@denominator
den <- e1@denominator * e2@numerator
# Create a new Rational object for the result
Rational(num, den)
}
)b. Use your rational class to create three objects
We create these objects first:
# Create three Rational objects
r1 <- Rational(24, 6)
r2 <- Rational(7, 230)
r3 <- Rational(0, 4) Then do the operations:
# 1. Display r1 and r3
cat("r1:\n")r1:
print(r1) # Should display the simplified version of 24/64 / 1
cat("\nr3:\n")
r3:
print(r3) # Should display 0/10 / 1
# 2. Arithmetic operations
cat("\nr1 + r2:\n")
r1 + r2:
print(r1 + r2) # Add r1 and r2927 / 230
cat("\nr1 - r2:\n")
r1 - r2:
print(r1 - r2) # Subtract r2 from r1913 / 230
cat("\nr1 * r2:\n")
r1 * r2:
print(r1 * r2) # Multiply r1 and r214 / 115
cat("\nr1 / r2:\n")
r1 / r2:
print(r1 / r2) # Divide r1 by r2920 / 7
cat("\nr1 + r3:\n")
r1 + r3:
print(r1 + r3) # Add r1 and r34 / 1
cat("\nr1 * r3:\n")
r1 * r3:
print(r1 * r3) # Multiply r1 and r3 (should result in 0/1)0 / 1
cat("\nr2 / r3:\n")
r2 / r3:
tryCatch(
print(r2 / r3), # This should throw an error because division by 0 is not allowed
error = function(e) cat("Error:", e$message, "\n")
)Error: Division by zero is not allowed.
# 3. Quotient method
cat("\nQuotient of r1:\n")
Quotient of r1:
print(quotient(r1))[1] 4
cat("\nQuotient of r2:\n")
Quotient of r2:
print(quotient(r2))[1] 0.03043478
cat("\nQuotient of r2 (3 digits):\n")
Quotient of r2 (3 digits):
print(quotient(r2, digits = 3))[1] 0.03
cat("\nQuotient of r2 (invalid digits = 3.14):\n")
Quotient of r2 (invalid digits = 3.14):
tryCatch(
print(quotient(r2, digits = 3.14)), # Should throw an error due to invalid digits argument
error = function(e) cat("Error:", e$message, "\n")
)[1] 0.03
cat("\nQuotient of r2 (invalid digits = 'avocado'):\n")
Quotient of r2 (invalid digits = 'avocado'):
tryCatch(
print(quotient(r2, digits = "avocado")), # Should throw an error due to invalid digits argument
error = function(e) cat("Error:", e$message, "\n")
)Error: digits must be a single numeric value.
q2 <- quotient(r2, digits = 3)
cat("\nStored quotient q2:\n")
Stored quotient q2:
print(q2)[1] 0.03
cat("\nQuotient of r3:\n")
Quotient of r3:
print(quotient(r3))[1] 0
# 4. Simplify method
cat("\nSimplified r1:\n")
Simplified r1:
print(simplify(r1))4 / 1
cat("\nSimplified r2:\n")
Simplified r2:
print(simplify(r2))7 / 230
cat("\nSimplified r3:\n")
Simplified r3:
print(simplify(r3))0 / 1
c. Check validator
# Test Cases for Validation
# Case 1: Valid rational number
cat("Test Case 1: Valid Rational Number (24/6)\n")Test Case 1: Valid Rational Number (24/6)
try(r_valid <- Rational(24, 6)) # Should work
print(r_valid)4 / 1
# Case 2: Zero denominator
cat("\nTest Case 2: Zero Denominator\n")
Test Case 2: Zero Denominator
try(r_invalid_zero_den <- Rational(1, 0)) # Should raise an errorError in Rational(1, 0) : Denominator cannot be zero.
# Case 3: Non-integer numerator
cat("\nTest Case 3: Non-integer Numerator (3.5/2)\n")
Test Case 3: Non-integer Numerator (3.5/2)
try(r_invalid_non_integer_num <- Rational(2.5, 5)) # Should raise an errorError in Rational(2.5, 5) :
Both numerator and denominator must be integers.
# Case 4: Non-integer denominator
cat("\nTest Case 4: Non-integer Denominator (3/2.5)\n")
Test Case 4: Non-integer Denominator (3/2.5)
try(r_invalid_non_integer_den <- Rational(5, 2.5)) # Should raise an errorError in Rational(5, 2.5) :
Both numerator and denominator must be integers.
# Case 5: Non-numeric inputs
cat("\nTest Case 5: Non-numeric Inputs ('a'/'b')\n")
Test Case 5: Non-numeric Inputs ('a'/'b')
try(r_invalid_both_non_integer <- Rational(3.5, 1.5)) # Should raise an errorError in Rational(3.5, 1.5) :
Both numerator and denominator must be integers.
# Case 6: Negative denominator (validation should adjust this automatically)
cat("\nTest Case 6: Negative Denominator (-3/4)\n")
Test Case 6: Negative Denominator (-3/4)
try(r_negative_denominator <- Rational(3, -4)) # Should work with adjusted signs
print(r_negative_denominator)3 / -4
Problem 2 - plotly
a. Does the distribution of genre of sales across years appear to change?
Plot from last time for comparison:
suppressPackageStartupMessages({
library(tidyverse)
library(plotly)
library(ggplot2)
})
art_sales <- read.csv("../data/df_for_ml_improved_new_market.csv")
art_sales_long <- art_sales %>%
pivot_longer(
cols = starts_with("Genre___"), # Select all genre-related columns
names_to = "genre",
values_to = "present",
values_drop_na = TRUE
) %>%
filter(present == 1) %>% # Keep only rows where the genre is present
mutate(genre = str_replace(genre, "Genre___", "")) # Remove "Genre___" prefix from genre labels
# Group data by year and genre to count sales
genre_distribution <- art_sales_long %>%
group_by(year, genre) %>%
summarise(count = n(), .groups = "drop")
# Create the updated stacked bar plot
ggplot(genre_distribution, aes(x = factor(year), y = count, fill = genre)) +
geom_bar(stat = "identity", position = "fill") +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Set2") + # Set the color palette
labs( # Change the title and labels
title = "Distribution of Genre Sales Across Years",
x = "Year",
y = "Proportion of Sales",
fill = "Genre"
) +
theme_minimal(base_size = 10) +
theme( # Change title and axis styles
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.title = element_text(size = 12)
)New plot by plotly:
# Preprocess the data for genre distribution
art_sales_long <- art_sales %>%
pivot_longer(
cols = starts_with("Genre___"),
names_to = "genre",
values_to = "present",
values_drop_na = TRUE
) %>%
filter(present == 1) %>%
mutate(genre = str_replace(genre, "Genre___", ""))
# Group data by year and genre to calculate proportions
genre_distribution <- art_sales_long %>%
group_by(year, genre) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(year) %>%
mutate(proportion = count / sum(count)) %>% # Calculate proportion of sales by genre
ungroup()
# Create an interactive stacked bar plot for proportions
genre_distribution_plot <- genre_distribution %>%
plot_ly(
x = ~year,
y = ~proportion,
color = ~genre,
type = "bar",
text = ~paste(
"Year:", year,
"<br>Genre:", genre,
"<br>Proportion:", scales::percent(proportion, accuracy = 0.1)
),
hoverinfo = "text"
) %>%
layout(
title = "Proportional Distribution of Genre Sales Across Years (Interactive)",
xaxis = list(title = "Year"),
yaxis = list(title = "Proportion", tickformat = "%"), # Format y-axis as percentages
barmode = "stack",
legend = list(title = list(text = "Genre"))
)
genre_distribution_plotFrom this plot, we see that starting from 1997, the distribution of genres changed over time. For example, (almost) no one bought print before 2000, but more people are having print around 2008. Also, we can see there is a decline in the proportion of people getting paintings and others. The proportion of sculpture and photography is stable over time after 2000.
b. Generate an interactive plot with plotly that can address 2 questions from last time
# Preprocess data for sales price analysis
price_by_genre <- art_sales %>%
pivot_longer(
cols = starts_with("Genre___"),
names_to = "genre",
values_to = "present",
values_drop_na = TRUE
) %>%
filter(present == 1) %>%
mutate(genre = str_replace(genre, "Genre___", "")) %>%
group_by(year, genre) %>%
summarise(avg_price = mean(price_usd, na.rm = TRUE), .groups = "drop")
# Combine overall average sales price with genre-specific averages
overall_avg_price <- art_sales %>%
group_by(year) %>%
summarise(avg_price = mean(price_usd, na.rm = TRUE), .groups = "drop") %>%
mutate(genre = "Overall") # Add a genre category for overall data
# Combine the two datasets
price_data_combined <- bind_rows(price_by_genre, overall_avg_price)
# Create an interactive plot
price_plot <- price_data_combined %>%
plot_ly(
x = ~year,
y = ~avg_price,
color = ~genre,
type = "scatter",
mode = "lines+markers",
text = ~paste(
"Year:", year,
"<br>Genre:", genre,
"<br>Average Price (USD):", scales::dollar(avg_price)
),
hoverinfo = "text"
) %>%
layout(
title = "Change in Sales Price Over Time (Overall and by Genre)",
xaxis = list(title = "Year"),
yaxis = list(title = "Average Price (USD)", tickformat = "$"),
legend = list(title = list(text = "Genre"))
)
price_plotFrom this plot, we see that starting from 1997, the overall tread of the sale price is increasing with time. In addition, we can see a peak in sale price at 2008 and dropped later, possibly due to economic recession. From this plot, we clearly see that the genre will affect the change in sales price over time. The others and painting has the lowest price increase over time. The print price is changing a over time, with an overall trend of increasing but very difference for each year. The sculpture genre, on the other hand, is increasing steadily over time. And the photography has the fastest and largest change in price. All of them have the highest prices in 2008.
Problem 3 - data.table
a. Tables for departure delay and arrival delay
# Load necessary libraries
suppressPackageStartupMessages({
library(data.table)
library(nycflights13)
})
# Convert data to data.table
flights_dt <- as.data.table(flights)
airports_dt <- as.data.table(airports)
planes_dt <- as.data.table(planes)
### Part 1a: Departure Delay Summary
departure_delay_summary <- flights_dt[
!is.na(dep_delay), # Exclude rows with NA in dep_delay
.(
mean_dep_delay = mean(dep_delay, na.rm = TRUE), # Calculate mean departure delay
median_dep_delay = median(dep_delay, na.rm = TRUE), # Calculate median departure delay
flight_count = .N # Count the number of flights
), by = origin
][flight_count >= 10][ # Exclude origins with fewer than 10 flights
airports_dt, on = .(origin = faa), nomatch = NULL # Join with airport names, remove unmatched rows
][
, .(
`Airport Name` = name,
`Mean Departure Delay (min)` = mean_dep_delay,
`Median Departure Delay (min)` = median_dep_delay,
`Number of Flights` = flight_count
)
][order(-`Mean Departure Delay (min)`)] # Order by descending mean departure delay
### Part 1b: Arrival Delay Summary
arrival_delay_summary <- flights_dt[
!is.na(arr_delay), # Exclude rows with NA in arr_delay
.(
mean_arr_delay = mean(arr_delay, na.rm = TRUE), # Calculate mean arrival delay
median_arr_delay = median(arr_delay, na.rm = TRUE), # Calculate median arrival delay
flight_count = .N # Count the number of flights
), by = dest
][flight_count >= 10][ # Exclude destinations with fewer than 10 flights
airports_dt, on = .(dest = faa), nomatch = NULL # Join with airport names, remove unmatched rows
][
, .(
`Airport Name` = name,
`Mean Arrival Delay (min)` = mean_arr_delay,
`Median Arrival Delay (min)` = median_arr_delay,
`Number of Flights` = flight_count
)
][order(-`Mean Arrival Delay (min)`)] # Order by descending mean arrival delayPrint the tables out nicely:
# Load knitr for better table formatting
library(knitr)
# Print the arrival delay summary as a formatted table
kable(
arrival_delay_summary,
format = "html",
align = "c",
col.names = c("Airport Name", "Mean Delay (min)", "Median Delay (min)", "Flights"),
caption = "Arrival Delay Summary"
) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| Airport Name | Mean Delay (min) | Median Delay (min) | Flights |
|---|---|---|---|
| Columbia Metropolitan | 41.7641509 | 28.0 | 106 |
| Tulsa Intl | 33.6598639 | 14.0 | 294 |
| Will Rogers World | 30.6190476 | 16.0 | 315 |
| Jackson Hole Airport | 28.0952381 | 15.0 | 21 |
| Mc Ghee Tyson | 24.0692042 | 2.0 | 578 |
| Dane Co Rgnl Truax Fld | 20.1960432 | 1.0 | 556 |
| Richmond Intl | 20.1112532 | 1.0 | 2346 |
| Akron Canton Regional Airport | 19.6983373 | 3.0 | 842 |
| Des Moines Intl | 19.0057361 | 0.0 | 523 |
| Gerald R Ford Intl | 18.1895604 | 1.0 | 728 |
| Birmingham Intl | 16.8773234 | -2.0 | 269 |
| Theodore Francis Green State | 16.2346369 | 1.0 | 358 |
| Greenville-Spartanburg International | 15.9354430 | -0.5 | 790 |
| Cincinnati Northern Kentucky Intl | 15.3645638 | -3.0 | 3725 |
| Savannah Hilton Head Intl | 15.1295060 | -1.0 | 749 |
| Manchester Regional Airport | 14.7875536 | -3.0 | 932 |
| Eppley Afld | 14.6988984 | -2.0 | 817 |
| Yeager | 14.6716418 | -1.5 | 134 |
| Kansas City Intl | 14.5140584 | 0.0 | 1885 |
| Albany Intl | 14.3971292 | -4.0 | 418 |
| General Mitchell Intl | 14.1672204 | 0.0 | 2709 |
| Piedmont Triad | 14.1126005 | -2.0 | 1492 |
| Washington Dulles Intl | 13.8642021 | -3.0 | 5383 |
| Cherry Capital Airport | 12.9684211 | -10.0 | 95 |
| James M Cox Dayton Intl | 12.6804861 | -3.0 | 1399 |
| Louisville International Airport | 12.6693841 | -2.0 | 1104 |
| Chicago Midway Intl | 12.3642236 | -1.0 | 4025 |
| Sacramento Intl | 12.1099291 | 4.0 | 282 |
| Jacksonville Intl | 11.8448342 | -2.0 | 2623 |
| Nashville Intl | 11.8124589 | -2.0 | 6084 |
| Portland Intl Jetport | 11.6604021 | -4.0 | 2288 |
| Greater Rochester Intl | 11.5606446 | -5.0 | 2358 |
| Hartsfield Jackson Atlanta Intl | 11.3001128 | -1.0 | 16837 |
| Lambert St Louis Intl | 11.0784645 | -3.0 | 4142 |
| Norfolk Intl | 10.9490934 | -4.0 | 1434 |
| Baltimore Washington Intl | 10.7267338 | -5.0 | 1687 |
| Memphis Intl | 10.6453144 | -2.5 | 1686 |
| Port Columbus Intl | 10.6013229 | -3.0 | 3326 |
| Charleston Afb Intl | 10.5929685 | -4.0 | 2759 |
| Philadelphia Intl | 10.1271901 | -3.0 | 1541 |
| Raleigh Durham Intl | 10.0523810 | -3.0 | 7770 |
| Indianapolis Intl | 9.9404341 | -3.0 | 1981 |
| Charlottesville-Albemarle | 9.5000000 | -5.0 | 46 |
| Cleveland Hopkins Intl | 9.1816113 | -5.0 | 4394 |
| Ronald Reagan Washington Natl | 9.0669520 | -2.0 | 9111 |
| Burlington Intl | 8.9509960 | -4.0 | 2510 |
| Buffalo Niagara Intl | 8.9459519 | -5.0 | 4570 |
| Syracuse Hancock Intl | 8.9039250 | -5.0 | 1707 |
| Denver Intl | 8.6065002 | -2.0 | 7169 |
| Palm Beach Intl | 8.5629721 | -3.0 | 6487 |
| Bob Hope | 8.1756757 | -3.0 | 370 |
| Fort Lauderdale Hollywood Intl | 8.0821215 | -3.0 | 11897 |
| Bangor Intl | 8.0279330 | -9.0 | 358 |
| Asheville Regional Airport | 8.0038314 | -1.0 | 261 |
| Pittsburgh Intl | 7.6809905 | -5.0 | 2746 |
| Gallatin Field | 7.6000000 | -2.0 | 35 |
| NW Arkansas Regional | 7.4657258 | -2.0 | 992 |
| Tampa Intl | 7.4085250 | -4.0 | 7390 |
| Charlotte Douglas Intl | 7.3603189 | -3.0 | 13674 |
| Minneapolis St Paul Intl | 7.2701689 | -5.0 | 6929 |
| William P Hobby | 7.1761882 | -4.0 | 2083 |
| Bradley Intl | 7.0485437 | -10.0 | 412 |
| San Antonio Intl | 6.9453718 | -9.0 | 659 |
| South Bend Rgnl | 6.5000000 | -3.5 | 10 |
| Louis Armstrong New Orleans Intl | 6.4901750 | -6.0 | 3715 |
| Key West Intl | 6.3529412 | 7.0 | 17 |
| Eagle Co Rgnl | 6.3043478 | -4.0 | 207 |
| Austin Bergstrom Intl | 6.0199088 | -5.0 | 2411 |
| Chicago Ohare Intl | 5.8766148 | -8.0 | 16566 |
| Orlando Intl | 5.4546431 | -5.0 | 13967 |
| Detroit Metro Wayne Co | 5.4299635 | -7.0 | 9031 |
| Portland Intl | 5.1415797 | -5.0 | 1342 |
| Nantucket Mem | 4.8522727 | -3.0 | 264 |
| Wilmington Intl | 4.6355140 | -7.0 | 107 |
| Myrtle Beach Intl | 4.6034483 | -13.0 | 58 |
| Albuquerque International Sunport | 4.3818898 | -5.5 | 254 |
| George Bush Intercontinental | 4.2407904 | -5.0 | 7085 |
| Norman Y Mineta San Jose Intl | 3.4481707 | -7.0 | 328 |
| Southwest Florida Intl | 3.2381496 | -5.0 | 3502 |
| San Diego Intl | 3.1391657 | -5.0 | 2709 |
| Sarasota Bradenton Intl | 3.0824313 | -5.0 | 1201 |
| Metropolitan Oakland Intl | 3.0776699 | -9.0 | 309 |
| General Edward Lawrence Logan Intl | 2.9143922 | -9.0 | 15022 |
| San Francisco Intl | 2.6728915 | -8.0 | 13173 |
| Yampa Valley | 2.1428571 | 2.0 | 14 |
| Phoenix Sky Harbor Intl | 2.0970473 | -6.0 | 4606 |
| Montrose Regional Airport | 1.7857143 | -10.5 | 14 |
| Los Angeles Intl | 0.5471109 | -7.0 | 16026 |
| Dallas Fort Worth Intl | 0.3221268 | -9.0 | 8388 |
| Miami Intl | 0.2990598 | -9.0 | 11593 |
| Mc Carran Intl | 0.2577285 | -8.0 | 5952 |
| Salt Lake City Intl | 0.1762546 | -8.0 | 2451 |
| Long Beach | -0.0620272 | -10.0 | 661 |
| Martha\\'s Vineyard | -0.2857143 | -11.0 | 210 |
| Seattle Tacoma Intl | -1.0990991 | -11.0 | 3885 |
| Honolulu Intl | -1.3651926 | -7.0 | 701 |
| John Wayne Arpt Orange Co | -7.8682266 | -11.0 | 812 |
| Palm Springs Intl | -12.7222222 | -13.5 | 18 |
# Print the departure delay summary as a formatted table
kable(
departure_delay_summary,
format = "html", # Use "html" for better formatting
align = "c", # Center align columns for readability
col.names = c("Airport Name", "Mean Delay (min)", "Median Delay (min)", "Flights"),
caption = "Departure Delay Summary"
) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| Airport Name | Mean Delay (min) | Median Delay (min) | Flights |
|---|---|---|---|
| Newark Liberty Intl | 15.10795 | -1 | 117596 |
| John F Kennedy Intl | 12.11216 | -1 | 109416 |
| La Guardia | 10.34688 | -3 | 101509 |
b. Flights with the fastest average speed
# Convert datasets to data.table
flights_dt <- as.data.table(flights)
planes_dt <- as.data.table(planes)
# Calculate speed (distance/time in hours) and find the fastest aircraft model
fastest_aircraft <- flights_dt[
!is.na(air_time) & air_time > 0, # Filter valid flights
.(speed_mph = distance / (air_time / 60), tailnum)
][
!is.na(tailnum), # Ensure valid tailnum
][
planes_dt, on = .(tailnum), # Join with planes dataset to get aircraft model
.(model, speed_mph)
][
, .(
avg_speed = mean(speed_mph, na.rm = TRUE), # Calculate average speed
num_flights = .N
), by = model
][
order(-avg_speed) # Order by descending average speed
][
1 # Select the top result
]
# Print the fastest aircraft details
kable(
fastest_aircraft,
format = "html",
align = "c",
col.names = c("Model", "Average Speed (mph)", "Flights"),
caption = "Fastest aircraft details"
) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| Model | Average Speed (mph) | Flights |
|---|---|---|
| 777-222 | 482.6254 | 4 |